library(ggplot2)
library(ggpubr)
## Loading required package: magrittr
library(Seurat)
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
##
## get_legend
folder="~/corona_project/plots_intigrate/"
### initilization
cell_type_genes=list("Bowman’s glands"=c("SOX9", "SOX10", "GPX3"),
"olfactory HBCs"=c("TP63", "KRT5", "CXCL14", "SOX2", "MEG3"),
"olfactory ensheathing glia"=c("S100B", "PLP1", "PMP2","MPZ", "ALX3"),
"olfactory microvillar cells"=c("ASCL3", "CFTR", "HEPACAM2", "FOXL1"),
"immature neurons"=c("GNG8", "OLIG2", "EBF2", "LHX2", "CBX8"),
"mature neurons"=c("GNG13", "EBF2", "CBX8", "RTP1"),
"GBCs"=c("HES6", "ASCL1", "CXCR4", "SOX2", "EZH2", "NEUROD1", "NEUROG1"),
"sustentacular cells"=c("CYP2A13", "CYP2J2", "GPX6", "ERMN", "SOX2"))
cell_type_genes_temp=list("Bowman’s glands"=c("SOX9", "GPX3"),
"olfactory HBCs"=c("CXCL14", "MEG3"),
"olfactory ensheathing glia"=c("S100B", "PLP1"),
"olfactory microvillar cells"=c( "ASCL3", "HEPACAM2"),
"immature neurons"=c("GNG8","OLIG2"),
"mature neurons"=c("GNG13","RTP1"),
"GBCs"=c("CXCR4", "SOX2"),
"sustentacular cells"=c("CYP2A13", "ERMN"))
marker_genes=c("SOX9", "SOX10", "GPX3",
"TP63", "KRT5", "CXCL14", "SOX2", "MEG3",
"S100B", "PLP1", "PMP2","MPZ", "ALX3",
"ASCL3", "CFTR", "HEPACAM2", "FOXL1",
"GNG8", "OLIG2", "EBF2", "LHX2", "CBX8",
"GNG13", "EBF2", "CBX8", "RTP1",
"HES6", "ASCL1", "CXCR4", "SOX2", "EZH2", "NEUROD1", "NEUROG1",
"CYP2A13", "CYP2J2", "GPX6", "ERMN", "SOX2")
marker_genes_temp=c("SOX9", "GPX3","CXCL14", "MEG3","S100B", "PLP1", "ASCL3", "HEPACAM2",
"GNG8","OLIG2","GNG13","RTP1","CXCR4", "SOX2","CYP2A13", "ERMN")
cell_type_names=c("Bowman’s glands",
"olfactory HBCs",
"olfactory ensheathing glia",
"olfactory microvillar cells",
"immature neurons",
"mature neurons",
"GBCs",
"sustentacular cells")
gene_path=c("LNPEP", "ACE", "CD143","PRCP","CTSG",
"PREP", "KLK1_2","CMA1","REN", "MME",
"THOP1","NLN","AGTR1","AGTR2","MAS1",
" MRGPRDCPA3","ACE2","AGT","ANPEP","ENPEP","CTSA","ATP6AP2")
###intigration and batch effect removing using Seurat
batch_list=list("P2","P3")
batch_data_list=list("P2"=1,"P3"=1)
for( i in 1:length(batch_list))
{
print(batch_list[[i]])
s_object=Read10X(paste("~/corona_project/Input_files/",batch_list[[i]],sep=""))
s_object=CreateSeuratObject(counts =s_object, min.cells = 0, min.features = 400, project = "P23")
s_object[["percent.mt"]] <- PercentageFeatureSet(s_object, pattern = "^MT-")
s_object <- subset(s_object, subset = nFeature_RNA >100 & nFeature_RNA <8000 & percent.mt <10)
s_object@meta.data[, "run"] <- batch_list[i]
s_object=NormalizeData(s_object)
batch_data_list[[i]]=FindVariableFeatures(s_object, selection.method = "vst", nfeatures =5000)
}
## [1] "P2"
## [1] "P3"
batch_data_list
## $P2
## An object of class Seurat
## 33538 features across 10881 samples within 1 assay
## Active assay: RNA (33538 features)
##
## $P3
## An object of class Seurat
## 33538 features across 5415 samples within 1 assay
## Active assay: RNA (33538 features)
saveRDS(batch_data_list,"~/corona_project/batch_data_list_5000_23.rds")
set.seed(1)
###intigration and batch effect removing using Seurat
run.anchors <- FindIntegrationAnchors(object.list = batch_data_list, dims = 1:30,anchor.features = 5000)
## Computing 5000 integration features
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 11489 anchors
## Filtering anchors
## Retained 8787 anchors
## Extracting within-dataset neighbors
a=run.anchors
run.combined <- IntegrateData(anchorset = run.anchors, dims = 1:30)
## Merging dataset 2 into 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Warning: Adding a command log without an assay associated with it
b=run.combined
sce_3_1_1_before=run.combined
saveRDS(sce_3_1_1_before,"~/corona_project/sce_3_1_1_before_5000_23.rds")
### Dim reduction and clustering
DefaultAssay(run.combined) <- "integrated"
run.combined <- ScaleData(run.combined, verbose = FALSE)
run.combined <- RunPCA(run.combined, npcs = 30, verbose = FALSE)
run.combined <- RunUMAP(run.combined, reduction = "pca", dims = 1:30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 18:20:34 UMAP embedding parameters a = 0.9922 b = 1.112
## 18:20:34 Read 16296 rows and found 30 numeric columns
## 18:20:34 Using Annoy for neighbor search, n_neighbors = 30
## 18:20:34 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 18:20:36 Writing NN index file to temp file /tmp/RtmpwdgpNE/file5bbf758358f2
## 18:20:36 Searching Annoy index using 1 thread, search_k = 3000
## 18:20:40 Annoy recall = 100%
## 18:20:41 Commencing smooth kNN distance calibration using 1 thread
## 18:20:41 Initializing from normalized Laplacian + noise
## 18:20:43 Commencing optimization for 200 epochs, with 707164 positive edges
## 18:20:48 Optimization finished
run.combined <- FindNeighbors(run.combined, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
run.combined <- FindClusters(run.combined, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 16296
## Number of edges: 654023
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9526
## Number of communities: 22
## Elapsed time: 2 seconds
### cell-types annotation
for(i in 0:as.integer(length(marker_genes)/4))
{
a=(i*4+1)
b=((i+1)*4)
if(i==as.integer(length(marker_genes)/4))
b=length(marker_genes)
if(a>length(marker_genes))
next
print(i)
print(VlnPlot(run.combined, features = marker_genes[a:b],ncol=2))
print(FeaturePlot(run.combined, features = marker_genes[a:b],cols=c("lightgrey", "red"),label = TRUE,pt.size=2,order = TRUE))
}
## [1] 0
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.


## [1] 1


## [1] 2


## [1] 3

## [1] 4
## Warning: Could not find FOXL1 in the default search locations, found in RNA
## assay instead

## Warning: Could not find FOXL1 in the default search locations, found in RNA
## assay instead

## [1] 5
## Warning: Could not find CBX8 in the default search locations, found in RNA assay
## instead

## Warning: Could not find CBX8 in the default search locations, found in RNA assay
## instead

## [1] 6
## Warning: Could not find CBX8 in the default search locations, found in RNA assay
## instead

## Warning: Could not find CBX8 in the default search locations, found in RNA assay
## instead


## [1] 7


## [1] 8


## [1] 9


run.combined<-RenameIdents(run.combined,`13`="Immature neurons", `14`="Mature neurons", `3`="Olfactory HBCs",`19`="Olfactory microvillar cells",
`5`="Bowman's gland", `16`="Olfactory ensheathing glia", `9`="Sustentacular cells",`20`="GBCs" )
run.combined=subset(run.combined, idents = c("Immature neurons","Mature neurons","Olfactory HBCs","Olfactory microvillar cells","Bowman's gland",
"Olfactory ensheathing glia","Sustentacular cells","GBCs"))
sce_3_1_1_after=run.combined
saveRDS(sce_3_1_1_after,"~/corona_project/sce_3_1_1_after_5000_23.rds")
### Dim plot to show clusters clearly for each cell type
DimPlot(run.combined, reduction = "umap")

### FeaturePlot to show the expression of ACE2 and TMPRSS2 in each cell type
FeaturePlot(run.combined, features = "ACE2",combine = FALSE,cols=c("lightgrey", "red"),pt.size=2,order =TRUE)
## Warning: Could not find ACE2 in the default search locations, found in RNA assay
## instead
## [[1]]

FeaturePlot(run.combined, features = "TMPRSS2",combine = FALSE,cols=c("lightgrey", "red"),pt.size=2,order = TRUE)
## [[1]]

### Percentage of ACE2 expressed's cells in each cell type
a=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["ACE2",])>0])])
print(sum(a))
## [1] 32
a=a*100/sum(a)
df=data.frame(cell_type=names(a),count_percentage=a)
ggplot(data=df, aes(x=cell_type, y=count_percentage.Freq, fill=cell_type)) +
geom_bar(stat="identity") + theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1),legend.position="none")

dim(run.combined@assays$RNA)
## [1] 33538 3906
### Percentage of TMPRSS2 expressed's cells in each cell type
a=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["TMPRSS2",])>0])])
print(sum(a))
## [1] 666
a=a*100/sum(a)
df=data.frame(cell_type=names(a),count_percentage=a)
ggplot(data=df, aes(x=cell_type, y=count_percentage.Freq, fill=cell_type)) +
geom_bar(stat="identity") + theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1),legend.position="none")

dim(run.combined@assays$RNA)
## [1] 33538 3906
### bar plot of union of both ACE2 and TMPRSS2
col_name_TMPRSS2=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["TMPRSS2",])>0])]
col_name_ACE2=Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["ACE2",])>0])]
union_names=union(names(col_name_ACE2),names(col_name_TMPRSS2))
union_cells=table(Seurat::Idents(run.combined)[union_names])
TMPRSS2_cells=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["TMPRSS2",])>0])])
ACE2_cells=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["ACE2",])>0])])
new_data=data.frame("cells_percenrage"=c(((TMPRSS2_cells/union_cells)*100),((ACE2_cells/union_cells)*100)),
genes=c(rep("TMPRSS2",length(TMPRSS2_cells)),rep("ACE2",length(ACE2_cells))),
cell_types=c(names(TMPRSS2_cells),names(ACE2_cells)))
ggplot(data=new_data, aes(x=cell_types, y=cells_percenrage, fill=genes)) +
geom_bar(stat="identity") + theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1))

### with common cells also
common_names=intersect(names(col_name_ACE2),names(col_name_TMPRSS2))
common_cells=table(Seurat::Idents(run.combined)[common_names])
svg(paste(folder,"bar_TMPRSS2_ACE2.svg",sep=""))
new_data=data.frame("cells_percenrage"=c(((TMPRSS2_cells/union_cells)*100),((ACE2_cells/union_cells)*100),((common_cells/union_cells)*100)),
genes=c(rep("TMPRSS2",length(TMPRSS2_cells)),rep("ACE2",length(ACE2_cells)),rep("common",length(common_cells))),
cell_types=c(names(TMPRSS2_cells),names(ACE2_cells),names(common_cells)))
ggplot(data=new_data, aes(x=cell_types, y=cells_percenrage, fill=genes)) +
geom_bar(stat="identity") + theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1))
### Dot plot cluster wise with padj values of ACE2 and TMPRSS2
DotPlot(run.combined,features = c("ACE2","TMPRSS2"),cols = c("lightgrey", "red",dot.scale = 10))
## Warning: Could not find ACE2 in the default search locations, found in RNA assay
## instead
### Finding differential expressed genes (HBC+;ACE2+) vs B (HBC+;ACE2-)
DefaultAssay(run.combined) <- "RNA"
HBC=subset(run.combined, idents = "Olfactory HBCs")
pos=c(rep(1,length(colnames(HBC[,as.matrix(HBC@assays$RNA["ACE2",])>0]))))
names(pos)=colnames(HBC[,as.matrix(HBC@assays$RNA["ACE2",])>0])
neg=c(rep(-1,length(colnames(HBC[,as.matrix(HBC@assays$RNA["ACE2",])<=0]))))
names(neg)=colnames(HBC[,as.matrix(HBC@assays$RNA["ACE2",])<=0])
pos_neg=c(pos,neg)
sum(pos_neg[which(names(pos_neg)%in%rownames(HBC@meta.data))]==pos_neg)
## [1] 1087
HBC=AddMetaData(
object = HBC,
metadata = pos_neg,
col.name = 'HBC'
)
Idents(HBC)=pos_neg
sum(pos_neg[which(names(pos_neg)%in%names(Idents(HBC)))]==pos_neg)
## [1] 1087
HBC_genes=FindMarkers(HBC, ident.1=1,ident.2=-1,test.use = "wilcox")
HBC_genes_a=rownames(HBC_genes)[HBC_genes[2]>1 & HBC_genes[1]<0.05]
HBC_genes_b=rownames(HBC_genes)[HBC_genes[2]<-1 & HBC_genes[1]<0.05]
HBC_genes_names=c(HBC_genes_a,HBC_genes_b)
### Finding differential expressed genes (Sustentacular+;ACE2+) vs B (Sustentacular+;ACE2-)
Sustentacular=subset(run.combined, idents = "Sustentacular cells")
pos=c(rep(1,length(colnames(Sustentacular[,as.matrix(Sustentacular@assays$RNA["ACE2",])>0]))))
names(pos)=colnames(Sustentacular[,as.matrix(Sustentacular@assays$RNA["ACE2",])>0])
neg=c(rep(-1,length(colnames(Sustentacular[,as.matrix(Sustentacular@assays$RNA["ACE2",])<=0]))))
names(neg)=colnames(Sustentacular[,as.matrix(Sustentacular@assays$RNA["ACE2",])<=0])
pos_neg=c(pos,neg)
sum(pos_neg[which(names(pos_neg)%in%rownames(Sustentacular@meta.data))]==pos_neg)
## [1] 737
Sustentacular=AddMetaData(
object = Sustentacular,
metadata = pos_neg,
col.name = 'Sustentacular'
)
Idents(Sustentacular)=pos_neg
sum(pos_neg[which(names(pos_neg)%in%names(Idents(Sustentacular)))]==pos_neg)
## [1] 737
Sustentacular_genes=FindMarkers(Sustentacular, ident.1=1,ident.2=-1,test.use = "wilcox")
Sustentacular_genes_a=rownames(Sustentacular_genes)[Sustentacular_genes[2]>1 & Sustentacular_genes[1]<0.05]
Sustentacular_genes_b=rownames(Sustentacular_genes)[Sustentacular_genes[2]<-1 & Sustentacular_genes[1]<0.05]
Sustentacular_genes_names=c(Sustentacular_genes_a,Sustentacular_genes_b)
### Saving DE genes
write.csv(HBC_genes_a,paste(folder,"HBC_genes_a.csv",sep=""))
write.csv(HBC_genes_b,paste(folder,"HBC_genes_b.csv",sep=""))
write.csv(Sustentacular_genes_a,paste(folder,"Sustentacular_genes_a.csv",sep=""))
write.csv(Sustentacular_genes_b,paste(folder,"Sustentacular_genes_b.csv",sep=""))
## Supplementary figures
### Do heat map
sce_3_1_1_after_5000_23 <- readRDS("~/corona_project/sce_3_1_1_after_5000_23.rds")
cluster.averages <- AverageExpression(sce_3_1_1_after_5000_23, return.seurat = TRUE)
## Finished averaging RNA for cluster Immature neurons
## Finished averaging RNA for cluster Mature neurons
## Finished averaging RNA for cluster Olfactory HBCs
## Finished averaging RNA for cluster Olfactory microvillar cells
## Finished averaging RNA for cluster Bowman's gland
## Finished averaging RNA for cluster Olfactory ensheathing glia
## Finished averaging RNA for cluster Sustentacular cells
## Finished averaging RNA for cluster GBCs
## Finished averaging integrated for cluster Immature neurons
## Finished averaging integrated for cluster Mature neurons
## Finished averaging integrated for cluster Olfactory HBCs
## Finished averaging integrated for cluster Olfactory microvillar cells
## Finished averaging integrated for cluster Bowman's gland
## Finished averaging integrated for cluster Olfactory ensheathing glia
## Finished averaging integrated for cluster Sustentacular cells
## Finished averaging integrated for cluster GBCs
## Centering and scaling data matrix
DoHeatmap(sce_3_1_1_after_5000_23,features =c(marker_genes_temp),size = 3, draw.lines = FALSE )
DoHeatmap(cluster.averages,features =c(marker_genes),size = 3, draw.lines = FALSE )
## Warning in DoHeatmap(cluster.averages, features = c(marker_genes), size = 3, :
## The following features were omitted as they were not found in the scale.data
## slot for the integrated assay: CBX8, FOXL1
### Dim plot to show clusters clearly for each cell type
plot=list()
for(i in 1:length(marker_genes_temp))
{
print(i)
if(i %in% c(1,5,9))
plot[[i]]<-VlnPlot(sce_3_1_1_after_5000_23, features = marker_genes_temp[[i]]) +
NoLegend() + theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.line.x = element_blank())
if(i %in% c(14,15,16))
plot[[i]]<-VlnPlot(sce_3_1_1_after_5000_23, features = marker_genes_temp[[i]]) +
NoLegend() + theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.line.y = element_blank())
if(i %in% c(13))
plot[[i]]<-print(VlnPlot(sce_3_1_1_after_5000_23, features = marker_genes_temp[[i]])) +
NoLegend()
if(i %in% c(2,3,4,6,7,8,10,11,12))
plot[[i]]<-VlnPlot(sce_3_1_1_after_5000_23, features = marker_genes_temp[[i]]) +
NoLegend()+ theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.line.x = element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.line.y = element_blank())
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
cowplot::plot_grid(plotlist = plot,nrow=4,rel_heights = c(1,1,1,2))
### for FeaturePlot
plot=list()
plot[[1]]=DimPlot(sce_3_1_1_after_5000_23, reduction = "umap",pt.size = 1,label=TRUE)+
labs(title = "") + NoAxes() + NoLegend()
for(i in 1:length(marker_genes_temp))
{
print(i)
plot[[i+1]]<-FeaturePlot(sce_3_1_1_after_5000_23, features = marker_genes_temp[[i]],cols=c("lightgrey", "red"),
pt.size=1,order = TRUE) + NoLegend() + NoAxes()
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
AT=cowplot::plot_grid(plotlist = plot[c(3,4,5)],ncol=3)
AB=cowplot::plot_grid(plotlist = plot[c(12,13,11)],ncol=3)
AR=cowplot::plot_grid(plotlist = plot[c(6,7,8,9,10)],nrow=5)
AL=cowplot::plot_grid(plotlist = plot[c(2,14,15,16,17)],nrow=5)
AC=cowplot::plot_grid(plotlist = plot[c(1)],nrow=1)
ATCB=cowplot::plot_grid(plotlist = list(AT,AC,AB),nrow=3,rel_heights = c(1,4,1))
cowplot::plot_grid(plotlist = list(AL,ATCB,AR),ncol=3,rel_widths = c(1,2,1))